{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "dd821a8d",
   "metadata": {},
   "source": [
    "<a href=\"https://colab.research.google.com/github/run-llama/llama_index/blob/main/docs/examples/vector_stores/QdrantIndexDemo.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "307804a3-c02b-4a57-ac0d-172c30ddc851",
   "metadata": {},
   "source": [
    "# Qdrant Vector Store"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f7010b1d-d1bb-4f08-9309-a328bb4ea396",
   "metadata": {},
   "source": [
    "#### Creating a Qdrant client"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0dc39c0b",
   "metadata": {},
   "outputs": [],
   "source": [
    "%pip install llama-index-vector-stores-qdrant llama-index-readers-file llama-index-embeddings-fastembed llama-index-llms-openai"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d5527d3d",
   "metadata": {},
   "outputs": [],
   "source": [
    "import logging\n",
    "import sys\n",
    "import os\n",
    "\n",
    "import qdrant_client\n",
    "from IPython.display import Markdown, display\n",
    "from llama_index.core import VectorStoreIndex, SimpleDirectoryReader\n",
    "from llama_index.core import StorageContext\n",
    "from llama_index.vector_stores.qdrant import QdrantVectorStore\n",
    "from llama_index.embeddings.fastembed import FastEmbedEmbedding\n",
    "from llama_index.core import Settings\n",
    "\n",
    "Settings.embed_model = FastEmbedEmbedding(model_name=\"BAAI/bge-base-en-v1.5\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "07489add",
   "metadata": {},
   "source": [
    "If running for the first, time, install the dependencies using:\n",
    "\n",
    "```\n",
    "!pip install -U qdrant_client fastembed\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "wGLkbqIm4XIe",
   "metadata": {},
   "source": [
    "Set your OpenAI key for authenticating the LLM"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e199eb1b",
   "metadata": {},
   "source": [
    "Follow these set the OpenAI API key to the OPENAI_API_KEY environment variable - "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d5d27466",
   "metadata": {},
   "source": [
    "1. Using Terminal"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f77cce7d",
   "metadata": {},
   "outputs": [],
   "source": [
    "export OPENAI_API_KEY=your_api_key_here"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3a772e25",
   "metadata": {},
   "source": [
    "2. Using IPython Magic Command in Jupyter Notebook"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "uI25Wj6x4SrT",
   "metadata": {},
   "outputs": [],
   "source": [
    "%env OPENAI_API_KEY=<YOUR_OPENAI_API_KEY>"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e0d84caf",
   "metadata": {},
   "source": [
    "3. Using Python Script"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "28329227",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.environ[\"OPENAI_API_KEY\"] = \"your_api_key_here\""
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f8b6361e",
   "metadata": {},
   "source": [
    "Note: It's generally recommended to set sensitive information like API keys as environment variables rather than hardcoding them into scripts."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9c79df94",
   "metadata": {},
   "outputs": [],
   "source": [
    "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n",
    "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c475d645",
   "metadata": {},
   "source": [
    "Download Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0d3e939d",
   "metadata": {},
   "outputs": [],
   "source": [
    "!mkdir -p 'data/paul_graham/'\n",
    "!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "01c44da1",
   "metadata": {},
   "source": [
    "#### Load the documents"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7cbe1384",
   "metadata": {},
   "outputs": [],
   "source": [
    "# load documents\n",
    "documents = SimpleDirectoryReader(\"./data/paul_graham/\").load_data()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8ee4473a-094f-4d0a-a825-e1213db07240",
   "metadata": {},
   "source": [
    "#### Build the VectorStoreIndex"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b873b936",
   "metadata": {},
   "outputs": [],
   "source": [
    "client = qdrant_client.QdrantClient(\n",
    "    # you can use :memory: mode for fast and light-weight experiments,\n",
    "    # it does not require to have Qdrant deployed anywhere\n",
    "    # but requires qdrant-client >= 1.1.1\n",
    "    # location=\":memory:\"\n",
    "    # otherwise set Qdrant instance address with:\n",
    "    # url=\"http://<host>:<port>\"\n",
    "    # otherwise set Qdrant instance with host and port:\n",
    "    host=\"localhost\",\n",
    "    port=6333\n",
    "    # set API KEY for Qdrant Cloud\n",
    "    # api_key=\"<qdrant-api-key>\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ba1558b3",
   "metadata": {},
   "outputs": [],
   "source": [
    "vector_store = QdrantVectorStore(client=client, collection_name=\"paul_graham\")\n",
    "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
    "index = VectorStoreIndex.from_documents(\n",
    "    documents,\n",
    "    storage_context=storage_context,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "36862545",
   "metadata": {},
   "source": [
    "#### Query Index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7bf3e2b9",
   "metadata": {},
   "outputs": [],
   "source": [
    "# set Logging to DEBUG for more detailed outputs\n",
    "query_engine = index.as_query_engine()\n",
    "response = query_engine.query(\"What did the author do growing up?\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "77a864c7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "<b>The author worked on writing and programming before college.</b>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(Markdown(f\"<b>{response}</b>\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "64e35c1e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# set Logging to DEBUG for more detailed outputs\n",
    "query_engine = index.as_query_engine()\n",
    "response = query_engine.query(\n",
    "    \"What did the author do after his time at Viaweb?\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "926b79da",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "<b>The author arranged to do freelance work for a group that did projects for customers after his time at Viaweb.</b>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(Markdown(f\"<b>{response}</b>\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7b4d27fc",
   "metadata": {},
   "source": [
    "#### Build the VectorStoreIndex asynchronously"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "08af428d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# To connect to the same event-loop,\n",
    "# allows async events to run on notebook\n",
    "\n",
    "import nest_asyncio\n",
    "\n",
    "nest_asyncio.apply()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "13fe7e09",
   "metadata": {},
   "outputs": [],
   "source": [
    "aclient = qdrant_client.AsyncQdrantClient(\n",
    "    # you can use :memory: mode for fast and light-weight experiments,\n",
    "    # it does not require to have Qdrant deployed anywhere\n",
    "    # but requires qdrant-client >= 1.1.1\n",
    "    location=\":memory:\"\n",
    "    # otherwise set Qdrant instance address with:\n",
    "    # uri=\"http://<host>:<port>\"\n",
    "    # set API KEY for Qdrant Cloud\n",
    "    # api_key=\"<qdrant-api-key>\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1918d705",
   "metadata": {},
   "outputs": [],
   "source": [
    "vector_store = QdrantVectorStore(\n",
    "    collection_name=\"paul_graham\",\n",
    "    client=client,\n",
    "    aclient=aclient,\n",
    "    prefer_grpc=True,\n",
    ")\n",
    "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
    "index = VectorStoreIndex.from_documents(\n",
    "    documents,\n",
    "    storage_context=storage_context,\n",
    "    use_async=True,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2d0401ab",
   "metadata": {},
   "source": [
    "#### Async Query Index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "88af9cf2",
   "metadata": {},
   "outputs": [],
   "source": [
    "query_engine = index.as_query_engine(use_async=True)\n",
    "response = await query_engine.aquery(\"What did the author do growing up?\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c88ff475",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "<b>The author worked on writing short stories and programming, particularly on an IBM 1401 computer in 9th grade using an early version of Fortran. Later, the author transitioned to working on microcomputers, starting with a TRS-80 in about 1980, where they wrote simple games, programs, and a word processor.</b>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(Markdown(f\"<b>{response}</b>\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5e8f1146",
   "metadata": {},
   "outputs": [],
   "source": [
    "# set Logging to DEBUG for more detailed outputs\n",
    "query_engine = index.as_query_engine(use_async=True)\n",
    "response = await query_engine.aquery(\n",
    "    \"What did the author do after his time at Viaweb?\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "90e1fa0e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "<b>The author went on to co-found Y Combinator after his time at Viaweb.</b>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(Markdown(f\"<b>{response}</b>\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "906b5a76",
   "metadata": {},
   "source": [
    "## Hybrid Search\n",
    "\n",
    "You can enable hybrid search when creating an qdrant index. Here, we use Qdrant's BM25 capabilities to quickly create a sparse and dense index for hybrid retrieval."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c4a5699d",
   "metadata": {},
   "outputs": [],
   "source": [
    "from qdrant_client import QdrantClient, AsyncQdrantClient\n",
    "from llama_index.core import VectorStoreIndex\n",
    "from llama_index.core import StorageContext\n",
    "from llama_index.vector_stores.qdrant import QdrantVectorStore\n",
    "\n",
    "client = QdrantClient(host=\"localhost\", port=6333)\n",
    "aclient = AsyncQdrantClient(host=\"localhost\", port=6333)\n",
    "\n",
    "vector_store = QdrantVectorStore(\n",
    "    client=client,\n",
    "    aclient=aclient,\n",
    "    collection_name=\"paul_graham_hybrid\",\n",
    "    enable_hybrid=True,\n",
    "    fastembed_sparse_model=\"Qdrant/bm25\",\n",
    ")\n",
    "\n",
    "index = VectorStoreIndex.from_documents(\n",
    "    documents,\n",
    "    storage_context=StorageContext.from_defaults(vector_store=vector_store),\n",
    ")\n",
    "\n",
    "# retrieve 2 sparse, 2 dense, and filter down to 3 total hybrid results\n",
    "query_engine = index.as_query_engine(\n",
    "    vector_store_query_mode=\"hybrid\",\n",
    "    sparse_top_k=2,\n",
    "    similarity_top_k=2,\n",
    "    hybrid_top_k=3,\n",
    ")\n",
    "\n",
    "response = query_engine.query(\"What did the author do growing up?\")\n",
    "display(Markdown(f\"<b>{response}</b>\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b65370a7",
   "metadata": {},
   "source": [
    "## Saving and Loading\n",
    "\n",
    "To restore an index, in most cases, you can just restore using the vector store object itself. The index is saved automatically by Qdrant."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "45b7dd11",
   "metadata": {},
   "outputs": [],
   "source": [
    "loaded_index = VectorStoreIndex.from_vector_store(\n",
    "    vector_store,\n",
    "    # Embedding model should match the original embedding model\n",
    "    # embed_model=Settings.embed_model\n",
    ")"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
